from lxml import etree

text = '''
<div>
<ul>
<li class="item-1"><a href="link-1.html">first item</a></li>
<li class="item-2"><a href="link-2.html">second item</a></li>
<li class="item-inactive"><a href="link-3.html">third item</a></li>
<li class="item-1"><a href="link-4.html">fourth item</a></li>
<li class="item-0"><a href="link-5.html">fifth item</a>
</ul>
</div>
'''

#初始化HTML对象

# html=etree.HTML(text)
# result=etree.tostring(html)
# print(result.decode('utf-8'))

#所有节点
html = etree.parse('./test.html', etree.HTMLParser())
result = html.xpath("//li/a/@href")

print(result)
